# some helper functions for the long-run equities graphs


# ratio function

udf_aggregateRatio <-
  function(data,
           numerator,
           denominator,
           varname = paste(numerator, denominator, sep = "/"),
           scale = 1,
           addCompanyNum = FALSE,
           addCompanyDen = FALSE)
  {
    # takes two variables and creates aggregate ratios, including by sector
    
    # add requested variables across share types
    
    if (addCompanyNum)
    {
      data <-
        data %>% group_by(RDate, Broad.Company.Name) %>%  mutate(!!as.name(numerator) := sum(get(numerator), na.rm =
                                                                                               T)) %>% ungroup()
    }
    
    if (addCompanyDen)
    {
      data <-
        data %>% group_by(RDate, Broad.Company.Name) %>%  mutate(!!as.name(denominator) := sum(get(denominator), na.rm =
                                                                                                 T)) %>% ungroup()
    }
    
    Sectors <- data %>%
      group_by(Dates, Broad.sector) %>%
      filter((!is.na(get(numerator)) & (!is.na(get(
        denominator
      ))))) %>%
      summarise(!!as.name(varname) := scale * sum(get(numerator)) / sum(get(denominator)))
    
    Total <- data %>%
      group_by(Dates) %>%
      filter((!is.na(get(numerator)) & (!is.na(get(
        denominator
      ))))) %>%
      summarise(!!as.name(varname) := scale * sum(get(numerator)) / sum(get(denominator))) %>%
      mutate(Broad.sector = "Total") %>%
      ungroup() %>%
      rbind(ungroup(Sectors)) %>%
      arrange(Dates)
    
    
    return(Total)
    
  }

# calculate price indices from company-level data

udf_price_index <- function(data, sector) {
  # this function takes a given sectoral subset and calculates a price index
  # the methodology is explained in the paper, but the main trick is to make sure it is unaffected by compositional change
  # that means include only companies that are in both periods
  
  if (sector == "Total") {
    sector <- c("Resources", "Financial", "Other")
  }
  
  data <- data %>%
    filter(Broad.sector %in% sector) %>%
    complete(Dates, nesting(Security, Broad.Company.Name)) %>% #make sure the lags are calculated correctly when companies are moving in and out of the index
    group_by(Security) %>%
    arrange(Dates) %>%
    mutate(
      company_gr = Market.Capitalisation / lag(Market.Capitalisation, 1),
      share_gr = Shares / lag(Shares, 1),
      share_gr = ifelse(share_gr > 1.9, share_gr, 1),
      company_gr = company_gr / share_gr,
      weight = lag(Market.Capitalisation, 1),
      weight = ifelse(is.na(weight), 0, weight)
    ) %>%
    group_by(Dates) %>%
    summarise(
      index_gr = weighted.mean(company_gr, weight, na.rm = T),
      index_gr = ifelse(is.na(index_gr), 1, index_gr)
    ) %>%
    mutate(Price_index = cumprod(index_gr)) %>%
    select(-index_gr) %>%
    mutate(Broad.sector = ifelse(length(sector) == 3, "Total", sector))
  
  return(data)
}



udf_pricecorrelations <- function(price_data, window) {
  # calculate rolling price correlations for a given set of share prices
  
  udf_pwCorrels <- function(y) {
    # small helper function for calculating pairwise correlations
    corMat <- cor(y, use = "pairwise.complete.obs")
    corMat[which(upper.tri(corMat, diag = TRUE))] <- NA
    
    return(mean(corMat, na.rm = T))
  }
  
  
  price_data <-
    spread(price_data, Security, Price) %>% as.data.frame()
  price_data <-
    zoo(price_data[, 2:ncol(price_data)], order.by = price_data[, "Dates"])
  price_change <-
    price_data / stats::lag(price_data, k = -1, na.pad = T) * 100 - 100
  
  n <- ncol(price_change)
  
  suppressWarnings(
    #you get a lot of 'the standard deviation is zero' warnings from this that don't matter
    price_correlations <-
      rollapplyr(price_change, window, by.column = FALSE, udf_pwCorrels) %>% fortify.zoo() %>% as_tibble() %>% rename(Dates = "Index", Correlations = ".")
  )
  
  return(price_correlations)
  
}



# calculate an equity risk premium from a safe asset yield, equity prices, dividend yields and a given rolling window

# helper function turning a bond yield into a total return index (this is used elsewhere too)
# assumes you sell the bond each year and buy a new one (therefore realising capital gain/loss)

udf_bondTR <- function(yield) {
  yield <- yield / 100
  yield <-
    ((lag(yield) * (1 - (1 + yield) ^ (-10)) / yield + 1 / (1 + yield) ^ 10) -
       1) + lag(yield) # (capital gain) + (coupon payments)
  yield <- 1 + yield
  yield[1] <- 1
  yield <- cumprod(yield)
  
  return(yield)
  
}

udf_ERP <-
  function(safe_asset_yield,
           equity_prices,
           dividend_yield,
           rolling_window) {
    # calculate total return on the safe asset, based on the yield
    safe_asset_TR <-  udf_bondTR(safe_asset_yield) %>%
      enframe(name = NULL) %>%
      mutate_all(function(x)
        (x / lag(x, rolling_window)) ^ (1 / rolling_window) * 100 - 100) # then calculate annualised return over a given period
    
    # calculate equity total returns
    equity_TR <-
      equity_prices / lag(equity_prices) + dividend_yield / 100
    equity_TR[1] <- 1
    equity_TR <- cumprod(equity_TR) %>%
      enframe(name = NULL) %>%
      mutate_all(function(x)
        (x / lag(x, rolling_window)) ^ (1 / rolling_window) * 100 - 100)
    
    # ERP = equity total return - return on safe assets
    return(unlist(equity_TR - safe_asset_TR, use.names = F))
    
  }

# two functions for calculating different measures of market concentration
udf_hhi <- function(data) {
  #calculate Herfindahl-Hirschmann index for each sector and the total
  group_by(data, Broad.sector, Dates) %>%
    summarise(hhi = sum((
      Market.Capitalisation / sum(Market.Capitalisation, na.rm = T)
    ) ^ 2, na.rm = T)) %>%
    select(Dates, hhi, Broad.sector) %>%
    ungroup() %>%
    rbind(data %>%
            group_by(Dates) %>%
            summarise(hhi = sum((Market.Capitalisation / sum(Market.Capitalisation, na.rm = T)) ^
                                  2, na.rm = T
            )) %>%
            mutate(Broad.sector = "Total"))
}

udf_topn <- function(data) {
  # calculate various decile shares by market capitalisation
  group_by(data, Dates) %>%
    mutate(capitalisation_group = floor((rank(
      -Market.Capitalisation
    ) - 1) / 10) * 10 + 10) %>%
    filter(capitalisation_group <= 100) %>%
    group_by(Dates, capitalisation_group) %>%
    summarise(Market.Capitalisation = sum(Market.Capitalisation, na.rm = T)) %>%
    mutate(Market.Capitalisation = Market.Capitalisation / sum(Market.Capitalisation, na.rm = T) * 100) %>%
    spread(capitalisation_group, Market.Capitalisation)
  
}

# calculate the growth rate of a series
# straightforward, but used in quite a few places...
udf_growthrate <- function(x) {
  x / lag(x) * 100 - 100
}